# ------------------------------------------------------------------------------
# Builds OOS models/predictions for train set.
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bsub -q big -R "rusage[mem=100000]" make
# ------------------------------------------------------------------------------

include config.mk

# Input variable default -------------------------------------------------------
# override with `make SPLIT={split} RESTRICTION={restriction}`
# SPLITS: {random,overnight}
SPLIT=random
# RESTRICTIONS: {all,dropcc,justcc,dem,enc}
RESTRICTION=all

# Directory target -------------------------------------------------------------
DIRECTORIES := $(foreach dir,$(MODELING_DIRS), $(dir)/.f ) log/.f

# Model targets ----------------------------------------------------------------
LASSOS	:= $(foreach outcome,$(OUTCOMES), \
	$(MODELS)/lasso__$(outcome)__5.rds \
)
TUNING_RESULTS	:= $(foreach outcome,$(OUTCOMES),\
	$(TUNING)/gbm__$(outcome)__5.rds \
)
GBMS		:= $(foreach outcome,$(OUTCOMES), \
	$(MODELS)/gbm__$(outcome)__5.rds \
)

# Ensembling targets -----------------------------------------------------------
SUBS 			:= $(foreach fold,1 2 3 4 5,\
	$(SUBSCORES)/subscores__$(fold).rds \
)
ENSEMBLES	:= $(foreach outcome,$(OUTCOMES),\
	$(MODELS)/ensemble__$(outcome)__5.rds \
)
SCORES		:= $(PREDICTION)/scores_train_set.rds

# Recipes ----------------------------------------------------------------------
## all		: Constructs all modeling targets.
.PHONY : all
all : scores

## pre_cohorts : Creates original train/test/val cohort files with folds.
.PHONY : pre_cohorts
pre_cohorts: $(PRE_COHORTS)/train_cohort.rds $(PRE_COHORTS)/test_cohort.rds \
$(PRE_COHORTS)/val_cohort.rds
$(PRE_COHORTS)/%.rds : ../01_primary_models/01_prep_modeling_data.sh \
../01_primary_models/scripts/01_prep_modeling_data.R
	$(MAKE) -C ../01_primary_models prep

## prep			: Creates ensembling sets within folds of train cohort.
.PHONY : prep
prep : $(DOWNSAMPLE_COHORTS)
$(COHORTS)/%.rds : 01_create_ensembles.sh scripts/01_create_ensembles.R \
$(DIRECTORIES) $(PRE_COHORTS)
	bash $< $(SPLIT)

## lassos		: Builds LASSO models.
.PHONY : lassos
lassos : $(LASSOS)
$(MODELS)/lasso__%__5.rds : 02_fit_lasso.sh scripts/02_fit_lasso.R \
../model_config/%.yml $(DOWNSAMPLE_COHORTS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## tune			: Tunes GBM models.
.PHONY : tune
tune : $(TUNING_RESULTS)
$(TUNING)/gbm__%__5.rds : 03_tune_gbm.sh scripts/03_tune_gbm.R \
../model_config/%.yml $(DOWNSAMPLE_COHORTS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## gbms			: Builds GBM models.
.PHONY : gbms
gbms : $(GBMS)
$(MODELS)/gbm__%__5.rds : 04_fit_gbm.sh scripts/04_fit_gbm.R \
$(TUNING)/gbm__%__5.rds ../model_config/%.yml $(DOWNSAMPLE_COHORTS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## subscores	: Predicts subscores for all GBMs, LASSOs.
.PHONY : subscores
subscores : $(SUBS)
$(SUBS) : 05_predict_ensemble_components.sh \
scripts/05_predict_ensemble_components.R $(LASSOS) $(GBMS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## ensembles	: Generates ensemble models for all outcomes.
.PHONY : ensembles
ensembles : $(ENSEMBLES)
$(MODELS)/ensemble__%__5.rds : 06_train_ensemble.sh scripts/06_train_ensemble.R \
../ensemble_config/%.yml $(SUBS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## scores 		: Generates predictions from ensemble models.
.PHONY : scores
scores : $(SCORES)
$(PREDICTION)/scores_train_set.rds : 07_predict_ensemble.sh \
scripts/07_predict_ensemble.R $(ENSEMBLES) $(SUBS)
	bash $< $(SPLIT) $(RESTRICTION)

# Config targets ---------------------------------------------------------------
## dirs 		: Builds all directories.
.PHONY : dirs
dirs : $(DIRECTORIES)
%/.f :
	mkdir -p $(dir $@)
	touch $@

## clean		: Deletes all models.
.PHONY : clean
clean :
	rm -rf $(MODELING)/*

.PHONY : help
help : Makefiles
	@sed -n 's/^##//p' $<
